{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "pycharm": {}
   },
   "source": [
    "# Homo LR Demo"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "pycharm": {}
   },
   "source": [
    "In this demo, we use the homo_logistic_regression(https://github.com/FederatedAI/FATE/tree/master/examples/federatedml-1.x-examples/homo_logistic_regression example. It is:\n",
    "* Homogeneous federated machine learning example, which both parties share same attributes but different samples;\n",
    "* Use breast cancer data original from Kaggle: https://www.kaggle.com/uciml/breast-cancer-wisconsin-data\n",
    "* For easy to demo, both party we use the same FATE cluster: 10000. But the underlayer is the same, each side threat the collborated party go through the network to another party. \n",
    "\n",
    "Step 0. Prepare the libary to manage federated machine learning."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "pycharm": {}
   },
   "outputs": [],
   "source": [
    "import json\n",
    "import time\n",
    "import os\n",
    "import requests\n",
    "\n",
    "from fml_manager import *\n",
    "\n",
    "manager = FMLManager()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## For more details about the FMLManager, please refer to this [document](https://kubefate.readthedocs.io/README.html)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "pycharm": {}
   },
   "source": [
    "Step 1. Upload the guest, host and test data. Because we use same cluster for this demo, we load all data in same NOTEBOOK. If we use another party for host, the host data should load in the NOTEBOOK of that party."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "response = manager.load_data(url='./data/breast_homo_guest.csv', namespace='homo_breast_guest', table_name='homo_breast_guest', work_mode=1, head=1, partition=10)\n",
    "output = json.loads(response.content)\n",
    "print(output)\n",
    "guest_job_id = output['jobId']\n",
    "guest_query_condition = QueryCondition(job_id=guest_job_id)\n",
    "\n",
    "response = manager.load_data(url='./data/breast_homo_host.csv', namespace='homo_breast_host', table_name='homo_breast_host', work_mode=1, head=1, partition=10)\n",
    "output = json.loads(response.content)\n",
    "host_job_id = output['jobId']\n",
    "host_query_condition = QueryCondition(job_id=host_job_id)\n",
    "\n",
    "response = manager.load_data(url='./data/breast_homo_test.csv', namespace='homo_breast_test', table_name='homo_breast_test', work_mode=1, head=1, partition=10)\n",
    "output = json.loads(response.content)\n",
    "test_job_id = output['jobId']\n",
    "test_query_condition = QueryCondition(job_id=test_job_id)\n",
    "\n",
    "\n",
    "manager.query_job_status(guest_query_condition)\n",
    "manager.query_job_status(host_query_condition)\n",
    "manager.query_job_status(host_query_condition)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "pycharm": {}
   },
   "source": [
    "Step 2. Create the steps DSL and configuration of each step for training."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "pycharm": {}
   },
   "outputs": [],
   "source": [
    "# dsl\n",
    "data_io = ComponentBuilder(name='dataio_0',\n",
    "                           module='DataIO')\\\n",
    "                           .add_input_data('args.train_data')\\\n",
    "                           .add_output_data('train')\\\n",
    "                           .add_output_model('dataio').build()\n",
    "        \n",
    "\n",
    "homo_lr = ComponentBuilder(name='homo_lr_0',\n",
    "                           module='HomoLR')\\\n",
    "                           .add_input_train_data('dataio_0.train')\\\n",
    "                           .add_output_data('train')\\\n",
    "                           .add_output_model('homolr').build()\n",
    "\n",
    "evaluation = ComponentBuilder(name='evaluation_0',\n",
    "                              module='Evaluation',\n",
    "                              need_deploy=False)\\\n",
    "    .add_input_data('homo_lr_0.train')\\\n",
    "    .add_output_data('evaluate').build()\n",
    "\n",
    "pipeline = Pipeline(\n",
    "    data_io, \n",
    "    homo_lr, \n",
    "    evaluation\n",
    ")\n",
    "\n",
    "# Configuration\n",
    "initiator = Initiator(role='guest', party_id=10000)\n",
    "\n",
    "job_parameters = JobParametersBuilder()\\\n",
    "    .with_work_mode(1).build()\n",
    "\n",
    "role = RoleBuilder()\\\n",
    "    .add_guest(party_id=10000)\\\n",
    "    .add_host(party_id=10000)\\\n",
    "    .add_arbiter(party_id=10000).build()\n",
    "\n",
    "eval_config = {\n",
    "       'need_run': [False]\n",
    " }\n",
    "\n",
    "role_parameters = RoleParametersBuilder()\\\n",
    "    .add_guest_train_data(namespace='homo_breast_guest', name='homo_breast_guest')\\\n",
    "    .add_host_train_data(namespace='homo_breast_host', name='homo_breast_host')\\\n",
    "    .add_host_module_config(module='evaluation_0', config=eval_config).build()\n",
    "\n",
    "\n",
    "homo_lr_params = {\n",
    "            'penalty': 'L2',\n",
    "            'optimizer': 'sgd',\n",
    "            'eps': 1e-5,\n",
    "            'alpha': 0.01,\n",
    "            'max_iter': 10,\n",
    "            'converge_func': 'diff',\n",
    "            'batch_size': 500,\n",
    "            'learning_rate': 0.15,\n",
    "            'decay': 1,\n",
    "            'decay_sqrt': True,\n",
    "            'init_param': {\n",
    "                'init_method': 'zeros'\n",
    "            },\n",
    "            'encrypt_param': {\n",
    "                'method': 'Paillier'\n",
    "            },\n",
    "            'cv_param': {\n",
    "                'n_splits': 4,\n",
    "                'shuffle': True,\n",
    "                'random_seed': 33,\n",
    "                'need_cv': False\n",
    "            }\n",
    "        }\n",
    "dotaio_config = {\n",
    "            'with_label': True,\n",
    "            'label_name': 'y',\n",
    "            'label_type': 'int',\n",
    "            'output_format': 'dense'\n",
    "        }\n",
    "\n",
    "algorithm_parameters = AlgorithmParametersBuilder()\\\n",
    "    .add_module_config(module='homo_lr_0', config=homo_lr_params)\\\n",
    "    .add_module_config(module='dataio_0', config=dotaio_config).build()\n",
    "\n",
    "config = Config(\n",
    "    initiator,\n",
    "    job_parameters,\n",
    "    role,\n",
    "    role_parameters,\n",
    "    algorithm_parameters\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "pycharm": {}
   },
   "source": [
    "Step 3. Submit the training job to GUEST cluster. And it will notify and bring up the HOST cluster and train together. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "response = manager.submit_job(pipeline.to_dict(),config.to_dict())\n",
    "manager.prettify(response, verbose=True)\n",
    "stdout = json.loads(response.content)\n",
    "job_id = stdout['jobId']\n",
    "query_condition = QueryCondition(job_id)\n",
    "\n",
    "model_id, model_version = '', ''\n",
    "manager.query_job_status(query_condition, max_tries=20)\n",
    "\n",
    "manager.prettify(response, verbose=True)\n",
    "output = json.loads(response.content)\n",
    "model_id, model_version = output['data']['model_info']['model_id'], output['data']['model_info']['model_version']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "pycharm": {}
   },
   "outputs": [],
   "source": [
    "response = manager.model_output(role='guest', party_id='10000', model_id=model_id, model_version=model_version, model_component='homo_lr_0.homolr:HomoLogisticRegression')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "pycharm": {}
   },
   "source": [
    "And we can try offline prediction feature. Prediction also need both parts participant."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "pycharm": {}
   },
   "outputs": [],
   "source": [
    "is_vertical = False\n",
    "initiator_party_role = 'guest'\n",
    "initiator_party_id = '10000'\n",
    "work_mode = 1\n",
    "federated_roles = {\n",
    "        'guest': [10000],\n",
    "        'host': [10000],\n",
    "        'arbiter': [10000]\n",
    "}\n",
    "guest_data_name = 'homo_breast_test'\n",
    "guest_data_namespace = 'homo_breast_test'\n",
    "host_data_name = 'homo_breast_test'\n",
    "host_data_namespace = 'homo_breast_test'\n",
    "\n",
    "response = manager.offline_predict_on_dataset(is_vertical, initiator_party_role, initiator_party_id, work_mode, model_id, model_version, federated_roles, guest_data_name, guest_data_namespace, host_data_name, host_data_namespace)\n",
    "print(response.text)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Click [here](/fateboard-10000/) to view jobs in FATE Board"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
